Project Introduction

Loading library

library(ggplot2)
library(tidyverse)
## -- Attaching packages ------------------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v tibble  1.4.2     v purrr   0.2.5
## v tidyr   0.8.1     v dplyr   0.7.6
## v readr   1.1.1     v stringr 1.3.1
## v tibble  1.4.2     v forcats 0.3.0
## -- Conflicts ---------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(stats)
library(forecast)

Project Description

Loading data. Downloaded from MySQL to the folder.

# Upload raw electric load and forecast data from .xlsx file
df <- read.csv("MergedData.csv")

Initial plotting

??? check it out whether we need to convert to LoadDate from factor to date/time format

# Create data frames more suiteable for time series

#df$LoadDate <- as.POSIXct((df$LoadDate)) #this line is not working  
#df$PTID <- as.factor(df$PTID)

It can be observed from the above two graphs that the maximum and minimum values for max_temp and min_temp are following a consistent pattern over time.

ggplot(data = df,
       aes(x = MaxTemp, y = TotalDailyPowerLoad, color = LoadZone)) +
  geom_jitter(width = 0.15) +
  guides(fill = TRUE) +
  stat_smooth(aes(group = 1), method = 'lm', formula = y ~ poly(x, 2, raw=TRUE)) +
  ggtitle(label = "Relationship Between Max Temp and Actual Load MWh",
          subtitle = "Color Encodes Various Zones") +
  xlab("Maximum Temperature (in Degrees Farenheight)") + ylab("TotalDailyPowerLoad (MWh)")

Plot Max and Min temp VS TotalDailyPowerLoad

Plotting function for both max and min temperature VS TotalDailyPowerLoad.

#plotting function for max and min temp VS actual load 
LoadPlot <- function(zoneName){
  #filtering for single zone
  load.1zone <- df %>% filter(LoadZone == zoneName)
  
  #plotting data #create plot for MAX temp
  p1<-
  ggplot(data = load.1zone, aes(x = MaxTemp, y = TotalDailyPowerLoad)) +
  geom_jitter(width = 0.15) +
  guides(fill = TRUE) +
  stat_smooth(aes(group = 1), method = 'lm', formula = y ~ poly(x, 2, raw=TRUE)) +
  ggtitle(label = zoneName) +
  xlab("Max Temp") + ylab("TotalDailyPowerLoad(MWh) ")
  print (p1)
  #fitting lm with 2nd degree poly feature 
  oneZone_lm1 <- lm(formula = TotalDailyPowerLoad ~ poly(MaxTemp, 2, raw=TRUE), data = load.1zone)
  print (oneZone_lm1)

  #create plot for MIN temp
  p2<-
  ggplot(data = load.1zone, aes(x = MinTemp, y = TotalDailyPowerLoad)) +
  geom_jitter(width = 0.15) +
  guides(fill = TRUE) +
  stat_smooth(aes(group = 1), method = 'lm', formula = y ~ poly(x, 2, raw=TRUE)) +
  ggtitle(label = zoneName) +
  xlab("Min Temp") + ylab("TotalDailyPowerLoad(MWh) ")
  
  print (p2)
  #fitting lm with 2nd degree poly feature 
  oneZone_lm2 <- lm(formula = TotalDailyPowerLoad ~ poly(MinTemp, 2, raw=TRUE), data = load.1zone)
  print (oneZone_lm2)
return()
}
NYISO zones.

NYISO zones.

max and min plot for zones.

#just put zoneName in the function LoadPlot
LoadPlot('WEST')

## 
## Call:
## lm(formula = TotalDailyPowerLoad ~ poly(MaxTemp, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                   (Intercept)  poly(MaxTemp, 2, raw = TRUE)1  
##                     58967.509                       -680.129  
## poly(MaxTemp, 2, raw = TRUE)2  
##                         6.234

## 
## Call:
## lm(formula = TotalDailyPowerLoad ~ poly(MinTemp, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                   (Intercept)  poly(MinTemp, 2, raw = TRUE)1  
##                     51509.612                       -550.382  
## poly(MinTemp, 2, raw = TRUE)2  
##                         6.968
## NULL
LoadPlot('GENESE')

## 
## Call:
## lm(formula = TotalDailyPowerLoad ~ poly(MaxTemp, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                   (Intercept)  poly(MaxTemp, 2, raw = TRUE)1  
##                     41431.848                       -617.494  
## poly(MaxTemp, 2, raw = TRUE)2  
##                         5.714

## 
## Call:
## lm(formula = TotalDailyPowerLoad ~ poly(MinTemp, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                   (Intercept)  poly(MinTemp, 2, raw = TRUE)1  
##                     33644.244                       -462.638  
## poly(MinTemp, 2, raw = TRUE)2  
##                         6.252
## NULL
LoadPlot('CENTRL')

## 
## Call:
## lm(formula = TotalDailyPowerLoad ~ poly(MaxTemp, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                   (Intercept)  poly(MaxTemp, 2, raw = TRUE)1  
##                     70300.270                       -988.399  
## poly(MaxTemp, 2, raw = TRUE)2  
##                         8.329

## 
## Call:
## lm(formula = TotalDailyPowerLoad ~ poly(MinTemp, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                   (Intercept)  poly(MinTemp, 2, raw = TRUE)1  
##                     55287.493                       -679.653  
## poly(MinTemp, 2, raw = TRUE)2  
##                         8.487
## NULL
LoadPlot('NORTH')

## 
## Call:
## lm(formula = TotalDailyPowerLoad ~ poly(MaxTemp, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                   (Intercept)  poly(MaxTemp, 2, raw = TRUE)1  
##                    19222.1468                      -154.7824  
## poly(MaxTemp, 2, raw = TRUE)2  
##                        0.9519

## 
## Call:
## lm(formula = TotalDailyPowerLoad ~ poly(MinTemp, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                   (Intercept)  poly(MinTemp, 2, raw = TRUE)1  
##                    16878.1122                      -112.1225  
## poly(MinTemp, 2, raw = TRUE)2  
##                        0.7356
## NULL
LoadPlot('MHK VL')

## 
## Call:
## lm(formula = TotalDailyPowerLoad ~ poly(MaxTemp, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                   (Intercept)  poly(MaxTemp, 2, raw = TRUE)1  
##                     35645.624                       -530.600  
## poly(MaxTemp, 2, raw = TRUE)2  
##                         4.501

## 
## Call:
## lm(formula = TotalDailyPowerLoad ~ poly(MinTemp, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                   (Intercept)  poly(MinTemp, 2, raw = TRUE)1  
##                     27210.088                       -331.307  
## poly(MinTemp, 2, raw = TRUE)2  
##                         4.057
## NULL
LoadPlot('CAPITL')

## 
## Call:
## lm(formula = TotalDailyPowerLoad ~ poly(MaxTemp, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                   (Intercept)  poly(MaxTemp, 2, raw = TRUE)1  
##                     53424.591                       -860.742  
## poly(MaxTemp, 2, raw = TRUE)2  
##                         7.834

## 
## Call:
## lm(formula = TotalDailyPowerLoad ~ poly(MinTemp, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                   (Intercept)  poly(MinTemp, 2, raw = TRUE)1  
##                     40525.529                       -566.965  
## poly(MinTemp, 2, raw = TRUE)2  
##                         7.791
## NULL
LoadPlot('HUD VL')

## 
## Call:
## lm(formula = TotalDailyPowerLoad ~ poly(MaxTemp, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                   (Intercept)  poly(MaxTemp, 2, raw = TRUE)1  
##                       47674.1                         -903.7  
## poly(MaxTemp, 2, raw = TRUE)2  
##                           8.5

## 
## Call:
## lm(formula = TotalDailyPowerLoad ~ poly(MinTemp, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                   (Intercept)  poly(MinTemp, 2, raw = TRUE)1  
##                     33782.441                       -569.197  
## poly(MinTemp, 2, raw = TRUE)2  
##                         8.434
## NULL
LoadPlot('MILLWD')

## 
## Call:
## lm(formula = TotalDailyPowerLoad ~ poly(MaxTemp, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                   (Intercept)  poly(MaxTemp, 2, raw = TRUE)1  
##                     15750.670                       -308.434  
## poly(MaxTemp, 2, raw = TRUE)2  
##                         2.665

## 
## Call:
## lm(formula = TotalDailyPowerLoad ~ poly(MinTemp, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                   (Intercept)  poly(MinTemp, 2, raw = TRUE)1  
##                     11408.218                       -226.666  
## poly(MinTemp, 2, raw = TRUE)2  
##                         2.852
## NULL
LoadPlot('DUNWOD')

## 
## Call:
## lm(formula = TotalDailyPowerLoad ~ poly(MaxTemp, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                   (Intercept)  poly(MaxTemp, 2, raw = TRUE)1  
##                     30295.227                       -606.495  
## poly(MaxTemp, 2, raw = TRUE)2  
##                         5.809

## 
## Call:
## lm(formula = TotalDailyPowerLoad ~ poly(MinTemp, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                   (Intercept)  poly(MinTemp, 2, raw = TRUE)1  
##                     23338.062                       -475.231  
## poly(MinTemp, 2, raw = TRUE)2  
##                         6.422
## NULL
LoadPlot('N.Y.C.')

## 
## Call:
## lm(formula = TotalDailyPowerLoad ~ poly(MaxTemp, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                   (Intercept)  poly(MaxTemp, 2, raw = TRUE)1  
##                     259279.25                       -5006.91  
## poly(MaxTemp, 2, raw = TRUE)2  
##                         47.17

## 
## Call:
## lm(formula = TotalDailyPowerLoad ~ poly(MinTemp, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                   (Intercept)  poly(MinTemp, 2, raw = TRUE)1  
##                     213279.54                       -4280.61  
## poly(MinTemp, 2, raw = TRUE)2  
##                         52.55
## NULL
LoadPlot('LONGIL')

## 
## Call:
## lm(formula = TotalDailyPowerLoad ~ poly(MaxTemp, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                   (Intercept)  poly(MaxTemp, 2, raw = TRUE)1  
##                      121811.1                        -2758.7  
## poly(MaxTemp, 2, raw = TRUE)2  
##                          26.3

## 
## Call:
## lm(formula = TotalDailyPowerLoad ~ poly(MinTemp, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                   (Intercept)  poly(MinTemp, 2, raw = TRUE)1  
##                      87765.55                       -2018.10  
## poly(MinTemp, 2, raw = TRUE)2  
##                         26.69
## NULL

From: http://apollo.lsc.vsc.edu/classes/met130/notes/chapter4/wet_bulb.html?dom=newscred&src=syn Impact of Humidity.

Ploting max Wet Bulb and min Wet bulb temperature VS TotalDailyPowerLoad

Plotting function for both Max and Min Wet bulb temperature VS TotalDailyPowerLoad.

#plotting function for max and min temp VS actual load 
LoadPlot <- function(zoneName){
  #filtering for single zone
  load.1zone <- df %>% filter(LoadZone == zoneName)
  
  #plotting data #create plot for MaxWetBulb temp
  p1<-
  ggplot(data = load.1zone, aes(x = MaxWetBulb, y = TotalDailyPowerLoad)) +
  geom_jitter(width = 0.15) +
  guides(fill = TRUE) +
  stat_smooth(aes(group = 1), method = 'lm', formula = y ~ poly(x, 2, raw=TRUE)) +
  ggtitle(label = zoneName) +
  xlab("MaxWetBulb Temp") + ylab("TotalDailyPowerLoad(MWh) ")
  print (p1)
  #fitting lm with 2nd degree poly feature 
  oneZone_lm1 <- lm(formula = TotalDailyPowerLoad ~ poly(MaxWetBulb, 2, raw=TRUE), data = load.1zone)
  print (oneZone_lm1)

  #create plot for MinWetBulb temp
  p2<-
  ggplot(data = load.1zone, aes(x = MinWetBulb, y = TotalDailyPowerLoad)) +
  geom_jitter(width = 0.15) +
  guides(fill = TRUE) +
  stat_smooth(aes(group = 1), method = 'lm', formula = y ~ poly(x, 2, raw=TRUE)) +
  ggtitle(label = zoneName) +
  xlab("MinWetBulb Temp") + ylab("TotalDailyPowerLoad(MWh) ")
  
  print (p2)
  #fitting lm with 2nd degree poly feature 
  oneZone_lm2 <- lm(formula = TotalDailyPowerLoad ~ poly(MinWetBulb, 2, raw=TRUE), data = load.1zone)
  print (oneZone_lm2)
return()
}
#just put zoneName in the function LoadPlot
LoadPlot('WEST')

## 
## Call:
## lm(formula = TotalDailyPowerLoad ~ poly(MaxWetBulb, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                      (Intercept)  poly(MaxWetBulb, 2, raw = TRUE)1  
##                        59437.033                          -814.661  
## poly(MaxWetBulb, 2, raw = TRUE)2  
##                            8.752

## 
## Call:
## lm(formula = TotalDailyPowerLoad ~ poly(MinWetBulb, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                      (Intercept)  poly(MinWetBulb, 2, raw = TRUE)1  
##                        50852.801                          -556.665  
## poly(MinWetBulb, 2, raw = TRUE)2  
##                            7.592
## NULL
LoadPlot('GENESE')

## 
## Call:
## lm(formula = TotalDailyPowerLoad ~ poly(MaxWetBulb, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                      (Intercept)  poly(MaxWetBulb, 2, raw = TRUE)1  
##                        41733.958                          -736.911  
## poly(MaxWetBulb, 2, raw = TRUE)2  
##                            8.027

## 
## Call:
## lm(formula = TotalDailyPowerLoad ~ poly(MinWetBulb, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                      (Intercept)  poly(MinWetBulb, 2, raw = TRUE)1  
##                        33176.223                          -466.661  
## poly(MinWetBulb, 2, raw = TRUE)2  
##                            6.703
## NULL
LoadPlot('CENTRL')

## 
## Call:
## lm(formula = TotalDailyPowerLoad ~ poly(MaxWetBulb, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                      (Intercept)  poly(MaxWetBulb, 2, raw = TRUE)1  
##                          70817.8                           -1183.9  
## poly(MaxWetBulb, 2, raw = TRUE)2  
##                             11.8

## 
## Call:
## lm(formula = TotalDailyPowerLoad ~ poly(MinWetBulb, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                      (Intercept)  poly(MinWetBulb, 2, raw = TRUE)1  
##                        54753.585                          -689.685  
## poly(MinWetBulb, 2, raw = TRUE)2  
##                            9.021
## NULL
LoadPlot('NORTH')

## 
## Call:
## lm(formula = TotalDailyPowerLoad ~ poly(MaxWetBulb, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                      (Intercept)  poly(MaxWetBulb, 2, raw = TRUE)1  
##                          19288.0                            -181.4  
## poly(MaxWetBulb, 2, raw = TRUE)2  
##                              1.3

## 
## Call:
## lm(formula = TotalDailyPowerLoad ~ poly(MinWetBulb, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                      (Intercept)  poly(MinWetBulb, 2, raw = TRUE)1  
##                       16760.8141                         -120.0580  
## poly(MinWetBulb, 2, raw = TRUE)2  
##                           0.9182
## NULL
LoadPlot('MHK VL')

## 
## Call:
## lm(formula = TotalDailyPowerLoad ~ poly(MaxWetBulb, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                      (Intercept)  poly(MaxWetBulb, 2, raw = TRUE)1  
##                         35504.61                           -605.32  
## poly(MaxWetBulb, 2, raw = TRUE)2  
##                             5.96

## 
## Call:
## lm(formula = TotalDailyPowerLoad ~ poly(MinWetBulb, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                      (Intercept)  poly(MinWetBulb, 2, raw = TRUE)1  
##                        26946.560                          -336.139  
## poly(MinWetBulb, 2, raw = TRUE)2  
##                            4.314
## NULL
LoadPlot('CAPITL')

## 
## Call:
## lm(formula = TotalDailyPowerLoad ~ poly(MaxWetBulb, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                      (Intercept)  poly(MaxWetBulb, 2, raw = TRUE)1  
##                         52931.71                          -1006.37  
## poly(MaxWetBulb, 2, raw = TRUE)2  
##                            10.97

## 
## Call:
## lm(formula = TotalDailyPowerLoad ~ poly(MinWetBulb, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                      (Intercept)  poly(MinWetBulb, 2, raw = TRUE)1  
##                        39802.544                          -569.585  
## poly(MinWetBulb, 2, raw = TRUE)2  
##                            8.414
## NULL
LoadPlot('HUD VL')

## 
## Call:
## lm(formula = TotalDailyPowerLoad ~ poly(MaxWetBulb, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                      (Intercept)  poly(MaxWetBulb, 2, raw = TRUE)1  
##                         46869.99                          -1035.23  
## poly(MaxWetBulb, 2, raw = TRUE)2  
##                            11.57

## 
## Call:
## lm(formula = TotalDailyPowerLoad ~ poly(MinWetBulb, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                      (Intercept)  poly(MinWetBulb, 2, raw = TRUE)1  
##                        33032.518                          -556.611  
## poly(MinWetBulb, 2, raw = TRUE)2  
##                            8.708
## NULL
LoadPlot('MILLWD')

## 
## Call:
## lm(formula = TotalDailyPowerLoad ~ poly(MaxWetBulb, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                      (Intercept)  poly(MaxWetBulb, 2, raw = TRUE)1  
##                        15809.645                          -362.401  
## poly(MaxWetBulb, 2, raw = TRUE)2  
##                            3.645

## 
## Call:
## lm(formula = TotalDailyPowerLoad ~ poly(MinWetBulb, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                      (Intercept)  poly(MinWetBulb, 2, raw = TRUE)1  
##                        11103.455                          -221.242  
## poly(MinWetBulb, 2, raw = TRUE)2  
##                            2.906
## NULL
LoadPlot('DUNWOD')

## 
## Call:
## lm(formula = TotalDailyPowerLoad ~ poly(MaxWetBulb, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                      (Intercept)  poly(MaxWetBulb, 2, raw = TRUE)1  
##                         29330.81                           -674.39  
## poly(MaxWetBulb, 2, raw = TRUE)2  
##                             7.64

## 
## Call:
## lm(formula = TotalDailyPowerLoad ~ poly(MinWetBulb, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                      (Intercept)  poly(MinWetBulb, 2, raw = TRUE)1  
##                        21976.593                          -433.532  
## poly(MinWetBulb, 2, raw = TRUE)2  
##                            6.371
## NULL
LoadPlot('N.Y.C.')

## 
## Call:
## lm(formula = TotalDailyPowerLoad ~ poly(MaxWetBulb, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                      (Intercept)  poly(MaxWetBulb, 2, raw = TRUE)1  
##                        252830.26                          -5666.58  
## poly(MaxWetBulb, 2, raw = TRUE)2  
##                            63.18

## 
## Call:
## lm(formula = TotalDailyPowerLoad ~ poly(MinWetBulb, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                      (Intercept)  poly(MinWetBulb, 2, raw = TRUE)1  
##                        196962.14                          -3891.67  
## poly(MinWetBulb, 2, raw = TRUE)2  
##                            53.88
## NULL
LoadPlot('LONGIL')

## 
## Call:
## lm(formula = TotalDailyPowerLoad ~ poly(MaxWetBulb, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                      (Intercept)  poly(MaxWetBulb, 2, raw = TRUE)1  
##                        116085.21                          -2971.70  
## poly(MaxWetBulb, 2, raw = TRUE)2  
##                            33.07

## 
## Call:
## lm(formula = TotalDailyPowerLoad ~ poly(MinWetBulb, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                      (Intercept)  poly(MinWetBulb, 2, raw = TRUE)1  
##                         82293.51                          -1863.42  
## poly(MinWetBulb, 2, raw = TRUE)2  
##                            26.71
## NULL

max and min Wet bulb temperature VS PeakDailyLoadPerHour

Plotting function for both max Wet Bulb and min Wet bulb temperature VS PeakDailyLoadPerHour.

#plotting function for max and min temp VS actual load 
LoadPlot <- function(zoneName){
  #filtering for single zone
  load.1zone <- df %>% filter(LoadZone == zoneName)
  
  #plotting data #create plot for MaxWetBulb temp
  p1<-
  ggplot(data = load.1zone, aes(x = MaxWetBulb, y = PeakDailyLoadPerHour)) +
  geom_jitter(width = 0.15) +
  guides(fill = TRUE) +
  stat_smooth(aes(group = 1), method = 'lm', formula = y ~ poly(x, 2, raw=TRUE)) +
  ggtitle(label = zoneName) +
  xlab("MaxWetBulb Temp") + ylab("PeakDailyLoadPerHour(MWh) ")
  print (p1)
  #fitting lm with 2nd degree poly feature 
  oneZone_lm1 <- lm(formula = PeakDailyLoadPerHour ~ poly(MaxWetBulb, 2, raw=TRUE), data = load.1zone)
  print (oneZone_lm1)

  #create plot for MinWetBulb temp
  p2<-
  ggplot(data = load.1zone, aes(x = MinWetBulb, y = PeakDailyLoadPerHour)) +
  geom_jitter(width = 0.15) +
  guides(fill = TRUE) +
  stat_smooth(aes(group = 1), method = 'lm', formula = y ~ poly(x, 2, raw=TRUE)) +
  ggtitle(label = zoneName) +
  xlab("MinWetBulb Temp") + ylab("PeakDailyLoadPerHour(MWh) ")
  
  print (p2)
  #fitting lm with 2nd degree poly feature 
  oneZone_lm2 <- lm(formula = PeakDailyLoadPerHour ~ poly(MinWetBulb, 2, raw=TRUE), data = load.1zone)
  print (oneZone_lm2)
return()
}
#just put zoneName in the function LoadPlot
LoadPlot('WEST')

## 
## Call:
## lm(formula = PeakDailyLoadPerHour ~ poly(MaxWetBulb, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                      (Intercept)  poly(MaxWetBulb, 2, raw = TRUE)1  
##                         2854.264                           -44.110  
## poly(MaxWetBulb, 2, raw = TRUE)2  
##                            0.494

## 
## Call:
## lm(formula = PeakDailyLoadPerHour ~ poly(MinWetBulb, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                      (Intercept)  poly(MinWetBulb, 2, raw = TRUE)1  
##                        2381.6330                          -28.8785  
## poly(MinWetBulb, 2, raw = TRUE)2  
##                           0.4182
## NULL
LoadPlot('GENESE')

## 
## Call:
## lm(formula = PeakDailyLoadPerHour ~ poly(MaxWetBulb, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                      (Intercept)  poly(MaxWetBulb, 2, raw = TRUE)1  
##                        2098.7934                          -40.8725  
## poly(MaxWetBulb, 2, raw = TRUE)2  
##                           0.4573

## 
## Call:
## lm(formula = PeakDailyLoadPerHour ~ poly(MinWetBulb, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                      (Intercept)  poly(MinWetBulb, 2, raw = TRUE)1  
##                        1617.0465                          -24.9125  
## poly(MinWetBulb, 2, raw = TRUE)2  
##                           0.3733
## NULL
LoadPlot('CENTRL')

## 
## Call:
## lm(formula = PeakDailyLoadPerHour ~ poly(MaxWetBulb, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                      (Intercept)  poly(MaxWetBulb, 2, raw = TRUE)1  
##                        3416.5440                          -60.7731  
## poly(MaxWetBulb, 2, raw = TRUE)2  
##                           0.6247

## 
## Call:
## lm(formula = PeakDailyLoadPerHour ~ poly(MinWetBulb, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                      (Intercept)  poly(MinWetBulb, 2, raw = TRUE)1  
##                        2580.6431                          -33.6654  
## poly(MinWetBulb, 2, raw = TRUE)2  
##                           0.4627
## NULL
LoadPlot('NORTH')

## 
## Call:
## lm(formula = PeakDailyLoadPerHour ~ poly(MaxWetBulb, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                      (Intercept)  poly(MaxWetBulb, 2, raw = TRUE)1  
##                        852.34070                          -8.06511  
## poly(MaxWetBulb, 2, raw = TRUE)2  
##                          0.05881

## 
## Call:
## lm(formula = PeakDailyLoadPerHour ~ poly(MinWetBulb, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                      (Intercept)  poly(MinWetBulb, 2, raw = TRUE)1  
##                        740.46268                          -5.32522  
## poly(MinWetBulb, 2, raw = TRUE)2  
##                          0.04185
## NULL
LoadPlot('MHK VL')

## 
## Call:
## lm(formula = PeakDailyLoadPerHour ~ poly(MaxWetBulb, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                      (Intercept)  poly(MaxWetBulb, 2, raw = TRUE)1  
##                        1666.5822                          -27.6392  
## poly(MaxWetBulb, 2, raw = TRUE)2  
##                           0.2768

## 
## Call:
## lm(formula = PeakDailyLoadPerHour ~ poly(MinWetBulb, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                      (Intercept)  poly(MinWetBulb, 2, raw = TRUE)1  
##                         1274.009                           -15.004  
## poly(MinWetBulb, 2, raw = TRUE)2  
##                            0.198
## NULL
LoadPlot('CAPITL')

## 
## Call:
## lm(formula = PeakDailyLoadPerHour ~ poly(MaxWetBulb, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                      (Intercept)  poly(MaxWetBulb, 2, raw = TRUE)1  
##                        2586.3296                          -50.7498  
## poly(MaxWetBulb, 2, raw = TRUE)2  
##                           0.5615

## 
## Call:
## lm(formula = PeakDailyLoadPerHour ~ poly(MinWetBulb, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                      (Intercept)  poly(MinWetBulb, 2, raw = TRUE)1  
##                        1917.7443                          -27.8152  
## poly(MinWetBulb, 2, raw = TRUE)2  
##                           0.4215
## NULL
LoadPlot('HUD VL')

## 
## Call:
## lm(formula = PeakDailyLoadPerHour ~ poly(MaxWetBulb, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                      (Intercept)  poly(MaxWetBulb, 2, raw = TRUE)1  
##                        2411.0805                          -57.3686  
## poly(MaxWetBulb, 2, raw = TRUE)2  
##                           0.6588

## 
## Call:
## lm(formula = PeakDailyLoadPerHour ~ poly(MinWetBulb, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                      (Intercept)  poly(MinWetBulb, 2, raw = TRUE)1  
##                        1630.5891                          -28.9096  
## poly(MinWetBulb, 2, raw = TRUE)2  
##                           0.4768
## NULL
LoadPlot('MILLWD')

## 
## Call:
## lm(formula = PeakDailyLoadPerHour ~ poly(MaxWetBulb, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                      (Intercept)  poly(MaxWetBulb, 2, raw = TRUE)1  
##                         817.8380                          -19.5484  
## poly(MaxWetBulb, 2, raw = TRUE)2  
##                           0.2016

## 
## Call:
## lm(formula = PeakDailyLoadPerHour ~ poly(MinWetBulb, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                      (Intercept)  poly(MinWetBulb, 2, raw = TRUE)1  
##                          559.829                           -11.429  
## poly(MinWetBulb, 2, raw = TRUE)2  
##                            0.156
## NULL
LoadPlot('DUNWOD')

## 
## Call:
## lm(formula = PeakDailyLoadPerHour ~ poly(MaxWetBulb, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                      (Intercept)  poly(MaxWetBulb, 2, raw = TRUE)1  
##                        1500.3043                          -36.1473  
## poly(MaxWetBulb, 2, raw = TRUE)2  
##                           0.4124

## 
## Call:
## lm(formula = PeakDailyLoadPerHour ~ poly(MinWetBulb, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                      (Intercept)  poly(MinWetBulb, 2, raw = TRUE)1  
##                        1100.3617                          -22.7319  
## poly(MinWetBulb, 2, raw = TRUE)2  
##                           0.3381
## NULL
LoadPlot('N.Y.C.')

## 
## Call:
## lm(formula = PeakDailyLoadPerHour ~ poly(MaxWetBulb, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                      (Intercept)  poly(MaxWetBulb, 2, raw = TRUE)1  
##                        12041.618                          -271.348  
## poly(MaxWetBulb, 2, raw = TRUE)2  
##                            3.056

## 
## Call:
## lm(formula = PeakDailyLoadPerHour ~ poly(MinWetBulb, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                      (Intercept)  poly(MinWetBulb, 2, raw = TRUE)1  
##                         9336.816                          -183.627  
## poly(MinWetBulb, 2, raw = TRUE)2  
##                            2.581
## NULL
LoadPlot('LONGIL')

## 
## Call:
## lm(formula = PeakDailyLoadPerHour ~ poly(MaxWetBulb, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                      (Intercept)  poly(MaxWetBulb, 2, raw = TRUE)1  
##                         6098.551                          -161.872  
## poly(MaxWetBulb, 2, raw = TRUE)2  
##                            1.816

## 
## Call:
## lm(formula = PeakDailyLoadPerHour ~ poly(MinWetBulb, 2, raw = TRUE), 
##     data = load.1zone)
## 
## Coefficients:
##                      (Intercept)  poly(MinWetBulb, 2, raw = TRUE)1  
##                         4246.064                           -99.976  
## poly(MinWetBulb, 2, raw = TRUE)2  
##                            1.451
## NULL

Modeling and analysis

splitting data to train, test, validation set

Splitting the data frame.

#writing a function to do the job of splitting into any number of sample

split <- function(df, NumOfSplit, prob )
{
  idx <- sample(seq(1, NumOfSplit), size = nrow(df), replace = TRUE, prob = prob)
  z = list()
  for (i in 1:NumOfSplit)
    z[[i]] <- df[idx == i,]
  z
}

Call the function and split the df into 3 subset.

#calling the function for 3 split with .6, .2, .2 
z <- split(df, 3, c(0.6, .2, .2 ))

#split makes a list. therefore, needed to convert as.data.frame
train <- as.data.frame(z[1]) 
test <- as.data.frame(z[2])
validation <- as.data.frame(z[3])

Linear fitting + Forecast + Evaluation

1 PeakDailyLoadPerHour(Y) ~ MinWetBulb(X).. (Poly with degree 2)

This function is taking in a single zone name and returning the accuracy of the fitted model in three steps.

First, do the fitted model With the train data set, for single zone. PeakDailyLoadPerHour(Y) ~ MinWetBulb(X).. (Poly with degree 2)

second, do the forecast on the Validation set of that zone.

Third, calculate the accuracy measure on the validation set of that zone and returning error measures.

FitAndForecast.GetAccuracy <- function(zoneName){
  
  ##1 fitting model 
  load.1zone <- train %>% filter(LoadZone == zoneName)
  fit1 <- lm(formula = PeakDailyLoadPerHour ~ poly(MinWetBulb, 2, raw=TRUE), data = load.1zone)
   
  ##2 forecasting using the model on validation set 
  vld <- validation %>% filter(LoadZone == zoneName)
  newX <- data.frame(MinWetBulb = vld$MinWetBulb)
  newY <- as.data.frame(predict(fit1, newX , interval = "prediction"))
  
  ##3 checking accuracy in the validation set. 
  lm.fit.err <- accuracy(vld$PeakDailyLoadPerHour, newY$fit)
  
return(lm.fit.err)
}

Calling the function to get accuracy for Load Zones.

FitAndForecast.GetAccuracy('WEST')
##                 ME     RMSE      MAE        MPE     MAPE
## Test set -5.413044 152.7431 120.5306 -0.2246034 5.934469
FitAndForecast.GetAccuracy('GENESE')
##                  ME     RMSE      MAE        MPE     MAPE
## Test set -0.9604313 133.7194 105.8946 -0.1390076 7.924645
FitAndForecast.GetAccuracy('CENTRL')
##                ME     RMSE      MAE         MPE     MAPE
## Test set -2.61262 181.2099 146.0655 -0.06699796 6.940605
FitAndForecast.GetAccuracy('NORTH')
##                ME     RMSE      MAE       MPE     MAPE
## Test set 3.370263 115.5455 104.3715 0.5032198 16.78555
FitAndForecast.GetAccuracy('MHK VL')
##                ME     RMSE      MAE       MPE     MAPE
## Test set 2.520532 104.3347 83.85739 0.2518137 7.915282
FitAndForecast.GetAccuracy('CAPITL')
##                 ME     RMSE      MAE        MPE     MAPE
## Test set -7.135304 146.1974 116.1448 -0.4190969 7.109119
FitAndForecast.GetAccuracy('HUD VL')
##                ME     RMSE      MAE       MPE     MAPE
## Test set 5.056217 153.5835 122.7574 0.3216401 8.768748
FitAndForecast.GetAccuracy('MILLWD')
##                  ME    RMSE      MAE        MPE     MAPE
## Test set -0.6008198 52.9304 41.82481 -0.1362392 10.37565
FitAndForecast.GetAccuracy('DUNWOD')
##                 ME     RMSE      MAE       MPE     MAPE
## Test set -6.870814 92.64358 70.71814 -0.873613 8.292068
FitAndForecast.GetAccuracy('N.Y.C.')
##               ME     RMSE     MAE       MPE     MAPE
## Test set 29.8879 662.9208 532.853 0.3592392 7.495659
FitAndForecast.GetAccuracy('LONGIL')
##                ME     RMSE      MAE       MPE     MAPE
## Test set 31.39448 358.0336 283.6268 0.7547435 9.161492

2 PeakDailyLoadPerHour(Y) ~ MaxWetBulb(X).. (Poly with degree 2)

This function is taking in a single zone name and returning the accuracy of the fitted model in three steps.

First, do the fitted model With the train data set, for single zone. PeakDailyLoadPerHour(Y) ~ MaxWetBulb(X).. (Poly with degree 2)

second, do the forecast on the Validation set of that zone.

Third, calculate the accuracy measure on the validation set of that zone and returning error measures.

FitAndForecast.GetAccuracy2 <- function(zoneName){
  
  ##1 fitting model 
  load.1zone <- train %>% filter(LoadZone == zoneName)
  fit1 <- lm(formula = PeakDailyLoadPerHour ~ poly(MaxWetBulb, 2, raw=TRUE), data = load.1zone)
   
  ##2 forecasting using the model on validation set 
  vld <- validation %>% filter(LoadZone == zoneName)
  newX <- data.frame(MaxWetBulb = vld$MaxWetBulb)
  newY <- as.data.frame(predict(fit1, newX , interval = "prediction"))
  
  ##3 checking accuracy in the validation set. 
  lm.fit.err <- accuracy(vld$PeakDailyLoadPerHour, newY$fit)
  
return(lm.fit.err)
}

Calling the function to get accuracy for Load Zones.

FitAndForecast.GetAccuracy2('WEST')
##                ME     RMSE      MAE       MPE     MAPE
## Test set -5.39369 154.4245 120.8294 -0.217008 5.952313
FitAndForecast.GetAccuracy2('GENESE')
##                 ME     RMSE      MAE        MPE     MAPE
## Test set -2.081136 133.7641 103.0909 -0.2465104 7.701083
FitAndForecast.GetAccuracy2('CENTRL')
##                ME     RMSE     MAE       MPE     MAPE
## Test set 4.058241 172.4326 140.176 0.2443172 6.645762
FitAndForecast.GetAccuracy2('NORTH')
##               ME     RMSE     MAE       MPE     MAPE
## Test set 2.86272 116.3961 105.256 0.4192049 16.94907
FitAndForecast.GetAccuracy2('MHK VL')
##                ME     RMSE      MAE       MPE     MAPE
## Test set 1.471857 104.7767 83.98497 0.1486672 7.934183
FitAndForecast.GetAccuracy2('CAPITL')
##                 ME     RMSE      MAE        MPE     MAPE
## Test set -8.436926 145.7763 115.2504 -0.5467227 7.061169
FitAndForecast.GetAccuracy2('HUD VL')
##               ME     RMSE      MAE       MPE     MAPE
## Test set 2.52361 145.0869 117.1187 0.1608239 8.409823
FitAndForecast.GetAccuracy2('MILLWD')
##                 ME     RMSE      MAE       MPE     MAPE
## Test set -2.465989 51.46572 41.18236 -0.540367 10.28728
FitAndForecast.GetAccuracy2('DUNWOD')
##                 ME     RMSE      MAE        MPE     MAPE
## Test set -3.841505 91.87184 72.71571 -0.5125774 8.511766
FitAndForecast.GetAccuracy2('N.Y.C.')
##                ME     RMSE     MAE       MPE     MAPE
## Test set 50.03386 678.2316 541.915 0.6636107 7.575088
FitAndForecast.GetAccuracy2('LONGIL')
##              ME    RMSE      MAE       MPE     MAPE
## Test set 35.597 376.669 298.3645 0.7874918 9.622758

3 TotalDailyPowerLoad(Y) ~ MaxWetBulb(X).. (Poly with degree 2)

This function is taking in a single zone name and returning the accuracy of the fitted model in three steps.

First, do the fitted model With the train data set, for single zone. TotalDailyPowerLoad(Y) ~ MaxWetBulb(X).. (Poly with degree 2)

second, do the forecast on the Validation set of that zone.

Third, calculate the accuracy measure on the validation set of that zone and returning error measures.

FitAndForecast.GetAccuracy3 <- function(zoneName){
  
  ##1 fitting model 
  load.1zone <- train %>% filter(LoadZone == zoneName)
  fit1 <- lm(formula = TotalDailyPowerLoad ~ poly(MaxWetBulb, 2, raw=TRUE), data = load.1zone)
   
  ##2 forecasting using the model on validation set 
  vld <- validation %>% filter(LoadZone == zoneName)
  newX <- data.frame(MaxWetBulb = vld$MaxWetBulb)
  newY <- as.data.frame(predict(fit1, newX , interval = "prediction"))
  
  ##3 checking accuracy in the validation set. 
  lm.fit.err <- accuracy(vld$TotalDailyPowerLoad, newY$fit)
  
return(lm.fit.err)
}

Calling the function to get accuracy for Load Zones.

FitAndForecast.GetAccuracy3('WEST')
##                 ME     RMSE     MAE         MPE     MAPE
## Test set -49.92723 2876.204 2259.02 -0.07954244 5.238982
FitAndForecast.GetAccuracy3('GENESE')
##                ME     RMSE      MAE         MPE    MAPE
## Test set -10.0417 2251.219 1726.061 -0.07453841 6.29091
FitAndForecast.GetAccuracy3('CENTRL')
##                ME     RMSE      MAE       MPE     MAPE
## Test set 127.9998 3163.947 2563.706 0.3323859 5.775595
FitAndForecast.GetAccuracy3('NORTH')
##                ME     RMSE      MAE       MPE     MAPE
## Test set 70.71218 2760.585 2510.163 0.4618102 17.90048
FitAndForecast.GetAccuracy3('MHK VL')
##                ME     RMSE      MAE       MPE     MAPE
## Test set 32.43152 2001.107 1596.299 0.1649741 7.279977
FitAndForecast.GetAccuracy3('CAPITL')
##                 ME    RMSE      MAE        MPE     MAPE
## Test set -200.9979 2654.58 2076.893 -0.6081936 6.188198
FitAndForecast.GetAccuracy3('HUD VL')
##                ME     RMSE      MAE        MPE     MAPE
## Test set 2.607919 2297.924 1824.008 0.01488751 6.633434
FitAndForecast.GetAccuracy3('MILLWD')
##                ME     RMSE      MAE        MPE     MAPE
## Test set -45.2124 915.6907 713.3045 -0.5119316 9.183784
FitAndForecast.GetAccuracy3('DUNWOD')
##                 ME     RMSE     MAE        MPE     MAPE
## Test set -79.47041 1590.632 1231.98 -0.4956704 7.202006
FitAndForecast.GetAccuracy3('N.Y.C.')
##                ME     RMSE      MAE       MPE     MAPE
## Test set 776.8983 12649.56 9989.987 0.5051881 6.695591
FitAndForecast.GetAccuracy3('LONGIL')
##                ME     RMSE      MAE       MPE     MAPE
## Test set 549.1906 6063.775 4707.085 0.6882973 7.837102

4 <>> TotalDailyPowerLoad(Y) ~ MaxWetBulb(x1), MinWetBulb(x2).. (Poly with degree 2)

This function is taking in a single zone name and returning the accuracy of the fitted model in three steps.

First, do the fitted model With the train data set, for single zone. TotalDailyPowerLoad(Y) ~ MaxWetBulb(x1), MinWetBulb(x2).. (Poly with degree 2)

second, do the forecast on the Validation set of that zone.

Third, calculate the accuracy measure on the validation set of that zone and returning error measures.

Calling the function to get accuracy for Load Zones.